pycaret数据挖掘实践
# 初始化配置 (在Notebook环境内)
clf1 = setup(data, target='is_canceled',train_size = 0.7,silent=True)
# 初始化配置 (在 Notebook 环境外)
#clf1 = setup(data, target = 'target-variable', html = False)
# 初始化配置 (在 Kaggle / GitHub actions / CI-CD 管道等远程执行)
#clf1 = setup(data, target = 'target-variable', html = False, silent = True)
#自动提取特征 自动填补缺失值
#best=compare_models()
#print('best is ')
#print(best)
#--- 输出特征字段的重要占比
#print(best.feature_importances_)
best = create_model('catboost')
test_input='hotel_bookings_test.csv'
ct1=best
pred_holdout = predict_model(ct1)
test_data=get_testdata('test_data')
pred_test_data = predict_model(ct1,data=test_data)
y_pred = pred_test_data[['Label','Score']]
print(y_pred.head())
评论